geno <- read_delim("tables/TableS1_genotypes.tsv")
pheno <- read_delim("tables/TableS2_phenotypes_metadata.tsv")
cipro_antibiogram <- full_join(geno,pheno)wt_colours <- c(`non-wt (I/R)`="IndianRed", `wt (S)`= "LightBlue")
res_colours <- c("I"="grey", "R"="IndianRed", "S"="LightBlue", "NWT"="grey")
res_colours2 <- c("I"="black", "R"="IndianRed", "S"="LightBlue", "NWT"="black")
res_colours3 <- c("NWT I"="#78638a", "NWT R"="IndianRed", "WT S"="LightBlue")linreg_details <- function(model) {
ci <- confint(model) %>% as_tibble(rownames="Determinant") %>%
rename(ci.lower=`2.5 %`, ci.upper=`97.5 %`)
model_summary <- summary(model)$coef %>%
as_tibble(rownames="Determinant") %>%
rename(est=Estimate, pval=`Pr(>|t|)`) %>%
select(Determinant, est, pval) %>% left_join(ci)
return(model_summary)
}cat_agree <- function(pred,truth) {
xtabs <- table(pred,factor(truth, levels = 0:1))
return((xtabs[1,1]+xtabs[2,2])/sum(xtabs))
}
ppv <- function(pred,truth) {
xtabs <- table(pred,truth)
return(xtabs[2,2]/sum(xtabs[2,]))
}
npv <- function(pred,truth) {
xtabs <- table(pred,truth)
return(xtabs[1,1]/sum(xtabs[1,]))
}
#sens <- function(pred,truth) {
# xtabs <- table(pred,truth)
# return(xtabs[2,2]/(sum(xtabs[,2])))
#}
sens <- function(pred,truth) {
xtabs <- table(pred,factor(truth, levels = 0:1))
return(xtabs[2,2]/(sum(xtabs[,2])))
}
spec <- function(pred,truth) {
xtabs <- table(pred,truth)
return(xtabs[1,1]/(sum(xtabs[,1])))
}
me <- function(pred,truth) {
xtabs <- table(pred,truth)
return(xtabs[2,1]/sum(xtabs[,1]))
}
vme <- function(pred,truth) {
xtabs <- table(pred,factor(truth, levels = 0:1))
return(xtabs[1,2]/sum(xtabs[,2]))
}
metrics <- function(pred,truth) {
xtabs <- table(pred,truth)
return(list(cat=cat_agree(pred,truth),
ppv=ppv(pred,truth),
npv=npv(pred,truth),
sens=sens(pred,truth),
spec=spec(pred,truth),
me=me(pred,truth),
vme=vme(pred,truth),
n=sum(xtabs),
summary=c("cat"=cat_agree(pred,truth),
"sens"=sens(pred,truth),
"spec"=spec(pred,truth),
"me"=me(pred,truth),
"vme"=vme(pred,truth),
"n"=sum(xtabs)
)
)
)
}# prepare matrix for modelling
R_vs_dets <- cipro_antibiogram %>%
select(`GyrA-83F`:`qnrVC6`, aac6, resistant) %>%
rename(`aac(6')-Ib-cr`=aac6) %>%
select(-c(ends_with('?'), ends_with('*'))) # remove imprecise matches
# exclude rare variants
R_vs_dets <- R_vs_dets[,colSums(R_vs_dets) >= 20]
# model R vs all common variants (N>=20) = Model 1a
model_R_indiv <- logistf(resistant ~ ., data=R_vs_dets)
# model R vs all common variants (N>=20), each with aac6 interaction = Model 1b
model_R_indiv_aac6Interaction <- logistf(resistant ~ .*`aac(6')-Ib-cr`, data=R_vs_dets)# summarise model output
logreg_model1a <- logreg_details(model_R_indiv) %>% mutate(interaction=NA) %>% mutate(model="a") %>% relocate(interaction, .after=Determinant) %>%
mutate(Determinant=gsub("`","",Determinant))
logreg_model1a$Determinant[logreg_model1a$Determinant == "aac(6')-Ib-cr"] <- "aac(6`)-Ib-cr"
logreg_model1b <- logreg_details(model_R_indiv_aac6Interaction) %>%
rename(Term=Determinant) %>%
separate_wider_delim(Term, ":", names=c("Determinant","interaction"), too_few="align_start", cols_remove=F) %>%
mutate(model="b") %>%
mutate(Determinant=gsub("`","",Determinant)) %>%
mutate(Term=gsub("`","",Term)) %>%
mutate(interaction=gsub("`","",interaction))
logreg_model1b$Determinant[logreg_model1b$Determinant == "aac(6')-Ib-cr"] <- "aac(6`)-Ib-cr"
# all individual determinants, plus significant positive interactions
log_reg_R_plot <- logreg_model1a %>% bind_rows(logreg_model1b) %>%
filter((pval<0.05 & est>0) | is.na(interaction)) %>%
mutate(interaction=factor(replace(interaction, is.na(interaction), "none"), levels=c("none", "aac(6')-Ib-cr"))) %>%
filter(Determinant != "(Intercept)") %>%
ggplot(aes(y=Determinant)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper, group=model, col=model, linetype=interaction), position=pd) +
geom_point(aes(x=est, group=model, col=model), position=pd, alpha=0.4) +
scale_colour_manual(values=c(a="#f1933b", b="#bd1515")) +
theme_bw() + labs(x="Regression coefficient", col="R Model", linetype="Interaction")
log_reg_R_plot# prepare matrix for modelling
NWT_vs_dets <- cipro_antibiogram %>%
select(`GyrA-83F`:`qnrVC6`, aac6, nonWT_binary) %>%
rename(`aac(6')-Ib-cr`=aac6) %>%
select(-c(ends_with('?'), ends_with('*'))) # remove imprecise matches
# exclude genomes with rare variants
NWT_vs_dets <- NWT_vs_dets[,colSums(NWT_vs_dets) >= 20]
# model NWT vs all common variants (N>=20) = Model 3a
model_NWT_indiv <- logistf(nonWT_binary ~ ., data=NWT_vs_dets)
# model NWT vs all common variants (N>=20), each with aac6 interaction = Model 3b
model_NWT_indiv_aac6Interaction <- logistf(nonWT_binary ~ .*`aac(6')-Ib-cr`, data=NWT_vs_dets)# summarise model output
logreg_model3a <- logreg_details(model_NWT_indiv) %>% mutate(interaction=NA) %>% mutate(model="a") %>% relocate(interaction, .after=Determinant) %>%
mutate(Determinant=gsub("`","",Determinant))
logreg_model3a$Determinant[logreg_model3a$Determinant == "aac(6')-Ib-cr"] <- "aac(6`)-Ib-cr"
logreg_model3b <- logreg_details(model_NWT_indiv_aac6Interaction) %>%
rename(Term=Determinant) %>%
separate_wider_delim(Term, ":", names=c("Determinant","interaction"), too_few="align_start", cols_remove=F) %>%
mutate(model="b") %>%
mutate(Determinant=gsub("`","",Determinant)) %>%
mutate(Term=gsub("`","",Term)) %>%
mutate(interaction=gsub("`","",interaction))
logreg_model3b$Determinant[logreg_model3b$Determinant == "aac(6')-Ib-cr"] <- "aac(6`)-Ib-cr"
# get count per marker to label on the right
marker_count <- cipro_antibiogram %>%
rename(`aac(6')-Ib-cr`=aac6) %>%
select(`GyrA-83F`:`qnrVC6`, `aac(6')-Ib-cr`) %>%
colSums()
marker_count_order <- logreg_model3a %>% bind_rows(logreg_model3b) %>%
filter((pval<0.05 & est>0) | is.na(interaction)) %>%
filter(Determinant != "(Intercept)") %>% pull(Determinant) %>% unique() %>% sort()
log_reg_NWT_plot <- logreg_model3a %>% bind_rows(logreg_model3b) %>%
filter((pval<0.05 & est>0) | is.na(interaction)) %>%
mutate(interaction=factor(replace(interaction, is.na(interaction), "none"), levels=c("none", "aac(6')-Ib-cr"))) %>%
filter(Determinant != "(Intercept)") %>%
ggplot(aes(y=Determinant)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper, group=model, col=model, linetype=interaction), position=pd) +
geom_point(aes(x=est, group=model, col=model), position=pd, alpha=0.4) +
scale_colour_manual(values=c(a="#70bfef", b="#2c15ae")) +
theme_bw() + labs(x="Regression coefficient", col="NWT Model", linetype="Interaction", y="") +
scale_y_discrete(labels=paste0("n=",marker_count[marker_count_order]), position="right")
log_reg_NWT_plotmultiVariable_vs_singleVariable <- logreg_model1a %>% select(-c(interaction, model)) %>%
setNames(paste0('R Model1a.', names(.))) %>% rename(Term='R Model1a.Determinant') %>%
full_join(logreg_model1b %>% select(-c(Determinant, interaction, model)) %>% setNames(paste0('R Model1b.', names(.)))%>% rename(Term='R Model1b.Term'), by="Term") %>%
full_join(logreg_model3a %>% select(-c(interaction, model)) %>% setNames(paste0('NWT Model1a.', names(.))) %>% rename(Term='NWT Model1a.Determinant'), by="Term") %>%
full_join(logreg_model3b %>% select(-c(Determinant, interaction, model)) %>% setNames(paste0('NWT Model1b.', names(.)))%>% rename(Term='NWT Model1b.Term'), by="Term")
write_csv(multiVariable_vs_singleVariable, file="tables/TableS7_LogRegCoef.csv")ppv_solo <- read_csv("tables/TableS6_determinantStats.csv") %>% filter(!is.na(N.solo)) %>% select(Determinant, ends_with(".solo"))
ppvR <- ppv_solo %>% mutate(p=R.solo/N.solo, se=sqrt(p*(1-p)/N.solo), ci.lower=p-1.96*se, ci.upper=p+1.96*se) %>% select(Determinant, p, ci.lower, ci.upper) %>% mutate(category="R")
ppvNWT <- ppv_solo %>% mutate(p=NWT.solo/N.solo, se=sqrt(p*(1-p)/N.solo), ci.lower=p-1.96*se, ci.upper=p+1.96*se) %>% select(Determinant, p, ci.lower, ci.upper, N.solo) %>% mutate(category="NWT")
ppvR$Determinant[ppvR$Determinant == "aac(6')-Ib-cr.v2"] <- "aac(6`)-Ib-cr.v2"
ppvR$Determinant[ppvR$Determinant == "aac(6')-Ib-cr.v2*"] <- "aac(6`)-Ib-cr.v2*"
ppvR_plot <- ppvR %>%
ggplot(aes(y=Determinant)) +
geom_vline(xintercept=50, linetype=2) +
geom_linerange(aes(xmin=ci.lower*100, xmax=ci.upper*100), col="#bd1515", position=pd) +
geom_point(aes(x=p*100, group=category), col="#bd1515", position=pd) +
theme_bw() + labs(x="PPV (%)")
ppvNWT$Determinant[ppvNWT$Determinant == "aac(6')-Ib-cr.v2"] <- "aac(6`)-Ib-cr.v2"
ppvNWT$Determinant[ppvNWT$Determinant == "aac(6')-Ib-cr.v2*"] <- "aac(6`)-Ib-cr.v2*"
ppvNWT_plot <- ppvNWT %>%
ggplot(aes(y=Determinant)) +
geom_vline(xintercept=50, linetype=2) +
geom_linerange(aes(xmin=ci.lower*100, xmax=ci.upper*100), col="#2c15ae", position=pd) +
geom_point(aes(x=p*100, group=category), col="#2c15ae", position=pd) +
theme_bw() + labs(x="PPV (%)", y="") +
scale_y_discrete(labels=paste0("n=",ppvNWT$N.solo), position="right")(ppvR_plot + ggtitle(label="a) PPV for solo marker", subtitle="R") + ppvNWT_plot + ggtitle(label="", subtitle="NWT") + plot_layout(guides="collect", axes="collect")) / (log_reg_R_plot + ggtitle(label="b) Logistic regression", subtitle="R") + log_reg_NWT_plot + ggtitle(label="", subtitle="NWT") + plot_layout(guides="collect", axes="collect"))# R models
model_R_indiv_pred <- predict(model_R_indiv, type="response")
model_R_indiv_aac6Interaction_pred <- predict(model_R_indiv_aac6Interaction, type="response")
# NWT models
model_NWT_indiv_pred <- predict(model_NWT_indiv, type="response")
model_NWT_indiv_aac6Interaction_pred <- predict(model_NWT_indiv_aac6Interaction, type="response")# R models
model_R_groups <- logistf(resistant ~ QRDR_mutations + acquired_genes + aac6, data=cipro_antibiogram)
model_R_groups_selectInteract <- logistf(resistant ~ QRDR_mutations*aac6 + acquired_genes*aac6, data=cipro_antibiogram)
# NWT models
model_NWT_groups <- logistf(nonWT_binary ~ QRDR_mutations + acquired_genes + aac6, data=cipro_antibiogram)
model_NWT_groups_selectInteract <- logistf(nonWT_binary ~ QRDR_mutations*aac6 + acquired_genes*aac6, data=cipro_antibiogram)
# R model predictions
model_R_groups_pred <- predict(model_R_groups, type="response")
model_R_groups_selectInteract_pred <- predict(model_R_groups_selectInteract, type="response")
# NWT model predictions
model_NWT_groups_pred <- predict(model_NWT_groups, type="response")
model_NWT_groups_selectInteract_pred <- predict(model_NWT_groups_selectInteract, type="response")count_model_summary <- logreg_details(model_R_groups) %>% mutate(Model="2a", response="R") %>%
bind_rows(logreg_details(model_R_groups_selectInteract) %>% mutate(Model="2b", response="R")) %>%
bind_rows(logreg_details(model_NWT_groups) %>% mutate(Model="2a", response="NWT")) %>%
bind_rows(logreg_details(model_NWT_groups_selectInteract) %>% mutate(Model="2b", response="NWT")) %>%
relocate(Model, .before=Determinant) %>% relocate(response, .before=Model) %>%
write_csv(file="tables/TableS8_CountLogRegCoef.csv")aic <- c(extractAIC(model_R_indiv)[2],
extractAIC(model_R_indiv_aac6Interaction)[2],
extractAIC(model_R_groups)[2],
extractAIC(model_R_groups_selectInteract)[2],
extractAIC(model_NWT_indiv)[2],
extractAIC(model_NWT_indiv_aac6Interaction)[2],
extractAIC(model_NWT_groups)[2],
extractAIC(model_NWT_groups_selectInteract)[2])
anova_result <- c("-",anova(model_R_indiv, model_R_indiv_aac6Interaction)$pval,
"-",anova(model_R_groups, model_R_groups_selectInteract)$pval,
"-",anova(model_NWT_indiv, model_NWT_indiv_aac6Interaction)$pval,
"-", anova(model_NWT_groups, model_NWT_groups_selectInteract)$pval)
metrics_result <- rbind(
metrics(model_R_indiv_pred>0.5, R_vs_dets$resistant)$summary,
metrics(model_R_indiv_aac6Interaction_pred>0.5, R_vs_dets$resistant)$summary,
metrics(model_R_groups_pred>0.5, cipro_antibiogram$resistant)$summary,
metrics(model_R_groups_selectInteract_pred>0.5, R_vs_dets$resistant)$summary,
metrics(model_NWT_indiv_pred>0.5, NWT_vs_dets$nonWT_binary)$summary,
metrics(model_NWT_indiv_aac6Interaction_pred>0.5, NWT_vs_dets$nonWT_binary)$summary,
metrics(model_NWT_groups_pred>0.5, cipro_antibiogram$nonWT_binary)$summary,
metrics(model_NWT_groups_selectInteract_pred>0.5, cipro_antibiogram$nonWT_binary)$summary
)
model_summary_tab <- cbind(aic, anova_result, metrics_result) %>% as_tibble() %>%
rename(Sensitivity=sens, Specificity=spec, ME=me, VME=vme, `Categorical agreement`=cat) %>%
rename(`model fit (aac6 interaction)`=anova_result) %>%
rename(AIC=aic) %>%
mutate(ModelName=c("Model 1a", "Model 1b", "Model 2a", "Model 2b", "Model 1a", "Model 1b", "Model 2a", "Model 2b")) %>%
mutate(response = c(rep("R",4), rep("NWT",4))) %>%
mutate(predictors = rep(c("determinants", "determinant*aac6", "QRDR count + PMQR count + aac6","QRDR count*aac6 + PMQR count*aac6"),2))model_summary_tab %>%
mutate(Sensitivity=paste0(round(as.numeric(Sensitivity)*100,2),"%")) %>%
mutate(Specificity=paste0(round(as.numeric(Specificity)*100,2),"%")) %>%
mutate(ME=paste0(round(as.numeric(ME)*100,2),"%")) %>%
mutate(VME=paste0(round(as.numeric(VME)*100,2),"%")) %>%
mutate(`Categorical agreement`=paste0(round(as.numeric(`Categorical agreement`)*100,2),"%")) %>%
select(response, ModelName, predictors, `Categorical agreement`, Sensitivity, Specificity, ME, VME, AIC, `model fit (aac6 interaction)`) %>%
write_csv("tables/Table1a_ModelSummary.csv", na="-")model_summary_tab_facet <- model_summary_tab
model_summary_tab_facet <- model_summary_tab_facet %>%
mutate(Sensitivity=round(as.numeric(Sensitivity)*100,2)) %>%
mutate(Specificity=round(as.numeric(Specificity)*100,2)) %>%
mutate(ME=round(as.numeric(ME)*100,2)) %>%
mutate(VME=round(as.numeric(VME)*100,2)) %>%
mutate(`Categorical agreement`=round(as.numeric(`Categorical agreement`)*100,2)) %>%
mutate(AIC=round(as.numeric(AIC),0)) %>%
mutate(Label=paste0(ModelName, ": ", predictors)) %>%
rename(Prediction=response)%>%
select(Label, Prediction, `Categorical agreement`, Sensitivity, Specificity, ME, VME, AIC) %>%
rename(`Major error`=ME, `Very major error`=VME)
model_summary_tab_long <- model_summary_tab_facet %>%
pivot_longer(
`Categorical agreement`:`AIC`,
names_to = "Evaluation metric",
values_to = "value"
)
model_summary_tab_long <- model_summary_tab_long%>%
mutate(`Evaluation metric` = factor(`Evaluation metric`, levels=c("Categorical agreement", "Sensitivity", "Specificity", "Major error", "Very major error", "AIC")))
model_summary_tab_long <- model_summary_tab_long%>%
mutate(Label = factor(Label, levels=c("Model 1a: determinants", "Model 1b: determinant*aac6", "Model 2a: QRDR count + PMQR count + aac6", "Model 2b: QRDR count*aac6 + PMQR count*aac6")))
model_summary_plot <-
model_summary_tab_long %>% ggplot(aes(x=(as.numeric(value)), y=fct_rev(Label)))+
geom_point(aes(colour=Prediction), position=pd) +
geom_text(aes(label=as.numeric(value), colour=Prediction), size=3,position=pd, fontface = "bold", vjust=-0.5, hjust=0.5)+
scale_color_manual(values=c("R"="IndianRed", "NWT"="#78638a")) +
facet_wrap(~ `Evaluation metric`, ncol = 6, scales="free_x")+
theme_linedraw() +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank())+
facetted_pos_scales(
x = list(`Evaluation metric` == "Categorical agreement" ~ scale_x_continuous(breaks = c(80,100), limits = c(70, 105)),
`Evaluation metric` == "Sensitivity" ~ scale_x_continuous(breaks = c(80,100), limits = c(65, 100)),
`Evaluation metric` == "Specificity" ~ scale_x_continuous(breaks = c(80, 100), limits = c(70, 105)),
`Evaluation metric` == "Major error" ~ scale_x_continuous(breaks = c(0,10), limits = c(0,10)),
`Evaluation metric` == "Very major error"~ scale_x_continuous(breaks = c(10,20), limits = c(0, 32)), `Evaluation metric` == "AIC"~ scale_x_continuous(breaks = c(12000,13000), limits = c(11500, 13800))
))
model_summary_plotcipro_antibiogram <- cipro_antibiogram %>%
mutate(QRDR_mutations_exclude = str_count(Flq_mutations, "GyrA-87G") + str_count(Flq_mutations, "GyrA-87H")) %>%
mutate(PMQR_exclude = str_count(Flq_acquired, "qnrB1.v1") + str_count(Flq_acquired, "qnrB1.v2")) %>%
mutate(rules_category = case_when(
QRDR_mutations==0 & acquired_genes==0 & aac6==0 ~ 0, # no determinants -> WT S
QRDR_mutations==1 & acquired_genes==0 & aac6==0 & QRDR_mutations_exclude==1 ~ 0, # non-associated QRDR solo -> WT S
QRDR_mutations==0 & acquired_genes==0 & aac6==1 ~ 1, # aac6 solo -> WT S
QRDR_mutations==0 & acquired_genes==1 & aac6==0 & PMQR_exclude==1 ~ 2, # non-associated PMQR solo -> NWT I
QRDR_mutations==1 & acquired_genes==0 & aac6==0 & QRDR_mutations_exclude==0 ~ 3, # 1 QRDR, except those not associated -> NWT R
QRDR_mutations==1 & acquired_genes==0 & aac6==1 ~ 4, # 1 QRDR + aac6 -> NWT R
QRDR_mutations>=2 & acquired_genes==0 ~ 5, # ≥2 QRDR -> NWT R
QRDR_mutations==0 & acquired_genes==1 & aac6==0 & PMQR_exclude==0 ~ 6, # 1 PMQR, except those not associated -> NWT R
QRDR_mutations==0 & acquired_genes==1 & aac6==1 ~ 7, # 1 PMQR + aac6
QRDR_mutations==0 & acquired_genes>=2 ~ 8, # ≥2 PMQR -> NWT R
QRDR_mutations>=1 & acquired_genes>=1 ~ 9, # QRDR + PMQR -> NWT R
TRUE ~ NA
))
# check all are assigned
table(cipro_antibiogram$rules_category)##
## 0 1 2 3 4 5 6 7 8 9
## 5680 161 160 103 23 2167 546 819 68 2440
##
## FALSE
## 12167
# assign S/R predictions to categories
cipro_antibiogram <- cipro_antibiogram %>%
mutate(predSIR = if_else(rules_category<=2, "S", "R")) %>%
mutate(predSIR = replace(predSIR, rules_category==2, "I")) %>%
mutate(predSIR = factor(predSIR, levels=c("S", "I", "R"))) %>%
mutate(predR = if_else(rules_category<=2, "S", "R")) %>%
mutate(predR = factor(predR, levels=c("S", "R"))) %>%
mutate(predNWT = if_else(rules_category<2, "WT", "NWT")) %>%
mutate(predNWT = factor(predNWT, levels=c("WT", "NWT")))
table(cipro_antibiogram$rules_category, cipro_antibiogram$predR)##
## S R
## 0 5680 0
## 1 161 0
## 2 160 0
## 3 0 103
## 4 0 23
## 5 0 2167
## 6 0 546
## 7 0 819
## 8 0 68
## 9 0 2440
##
## S I R
## 0 5680 0 0
## 1 161 0 0
## 2 0 160 0
## 3 0 0 103
## 4 0 0 23
## 5 0 0 2167
## 6 0 0 546
## 7 0 0 819
## 8 0 0 68
## 9 0 0 2440
##
## WT NWT
## 0 5680 0
## 1 161 0
## 2 0 160
## 3 0 103
## 4 0 23
## 5 0 2167
## 6 0 546
## 7 0 819
## 8 0 68
## 9 0 2440
species_R <- cipro_antibiogram %>% group_by(species) %>%
filter(species != "Klebsiella variicola subsp. tropica") %>%
filter(species != "Klebsiella quasivariicola") %>%
filter(species != "Klebsiella africana") %>%
summarise(cat=cat_agree(predR, resistant),
sens=sens(predR, resistant),
spec=spec(predR, resistant),
me=me(predR, resistant),
vme=vme(predR, resistant),
n=n())%>%
mutate(out=rep("R", 4)) %>%
relocate(out, .before=species)
species_NWT <- cipro_antibiogram %>% group_by(species) %>%
filter(species != "Klebsiella variicola subsp. tropica") %>%
filter(species != "Klebsiella quasivariicola") %>%
filter(species != "Klebsiella africana") %>%
summarise(cat=cat_agree(predNWT, nonWT_binary),
sens=sens(predNWT, nonWT_binary),
spec=spec(predNWT, nonWT_binary),
me=me(predNWT, nonWT_binary),
vme=vme(predNWT, nonWT_binary),
n=n()) %>%
mutate(out=rep("NWT", 4)) %>%
relocate(out, .before=species)
species_summary_table <-
bind_rows(c("out"="R","species"="Klebsiella pneumoniae species complex",
metrics(cipro_antibiogram$predR, cipro_antibiogram$resistant)$summary),
c("out"="NWT","species"="Klebsiella pneumoniae species complex",
metrics(cipro_antibiogram$predNWT, cipro_antibiogram$nonWT_binary)$summary)
) %>%
mutate_at(c("cat","sens","spec","me","vme", "n"), as.double) %>%
bind_rows(species_R) %>%
bind_rows(species_NWT) %>%
rename(Sensitivity=sens, Specificity=spec, ME=me, VME=vme, `Categorical agreement`=cat, N=n) %>%
mutate(Sensitivity=paste0(round(as.numeric(Sensitivity)*100,2),"%")) %>%
mutate(Specificity=paste0(round(as.numeric(Specificity)*100,2),"%")) %>%
mutate(ME=paste0(round(as.numeric(ME)*100,2),"%")) %>%
mutate(VME=paste0(round(as.numeric(VME)*100,2),"%")) %>%
mutate(`Categorical agreement`=paste0(round(as.numeric(`Categorical agreement`)*100,2),"%")) %>%
select(out, species, `Categorical agreement`, Sensitivity, Specificity, ME, VME, N)
write_csv(species_summary_table, "tables/Table1b_ModelSummary.csv", na="-")species_summary_table <-
bind_rows(c("out"="R","species"="Klebsiella pneumoniae species complex",
metrics(cipro_antibiogram$predR, cipro_antibiogram$resistant)$summary),
c("out"="NWT","species"="Klebsiella pneumoniae species complex",
metrics(cipro_antibiogram$predNWT, cipro_antibiogram$nonWT_binary)$summary)
) %>%
mutate_at(c("cat","sens","spec","me","vme", "n"), as.double) %>%
bind_rows(species_R) %>%
bind_rows(species_NWT) %>%
rename(Sensitivity=sens, Specificity=spec, ME=me, VME=vme, `Categorical agreement`=cat, N=n) %>%
mutate(Sensitivity=round(as.numeric(Sensitivity)*100,2)) %>%
mutate(Specificity=round(as.numeric(Specificity)*100,2)) %>%
mutate(ME=round(as.numeric(ME)*100,2)) %>%
mutate(VME=round(as.numeric(VME)*100,2)) %>%
mutate(`Categorical agreement`=round(as.numeric(`Categorical agreement`)*100,2)) %>%
rename(Prediction=out)%>%
mutate(species=gsub("Klebsiella quasipneumoniae subsp. quasipneumoniae","Kq subsp. quasipneumoniae",species)) %>%
mutate(species=gsub("Klebsiella quasipneumoniae subsp. similipneumoniae","Kq subsp. similipneumoniae",species)) %>%
mutate(species=gsub("Klebsiella variicola subsp. variicola","Klebsiella variicola",species)) %>%
select(species, Prediction, `Categorical agreement`, Sensitivity, Specificity, ME, VME) %>%
rename(`Major error`=ME, `Very major error`=VME, Label=species)
species_summary_table_long <- species_summary_table %>%
pivot_longer(
`Categorical agreement`:`Very major error`,
names_to = "Evaluation metric",
values_to = "value"
)
species_summary_table_long <- species_summary_table_long%>%
mutate(`Evaluation metric` = factor(`Evaluation metric`, levels=c("Categorical agreement", "Sensitivity", "Specificity", "Major error", "Very major error")))
species.labels=c("<br>*Klebsiella pnemoniae* species complex (N=12,167)",
"<br>*Klebsiella pneumoniae* (N=10,634)",
"<br>*Kq* subsp. <br>*quasipneumoniae* (N=189)",
"<br>*Kq* subsp. <br>*similipneumoniae* (N=286)",
"<br>*Klebsiella variicola* (N=1,037)")
species_summary_table_long <- species_summary_table_long%>%
mutate(Label = factor(Label, levels=c("Klebsiella pneumoniae species complex", "Klebsiella pneumoniae", "Kq subsp. quasipneumoniae", "Kq subsp. similipneumoniae", "Klebsiella variicola")))
rules_species_summary_plot <-
species_summary_table_long %>% ggplot(aes(x=(as.numeric(value)), y=fct_rev(Label)))+
geom_point(aes(colour=Prediction), position=pd) +
geom_text(aes(label=as.numeric(value), colour=Prediction), size=3,position=pd, fontface = "bold", vjust=-0.5, hjust=0.5)+
scale_color_manual(values=c("R"="IndianRed", "NWT"="#78638a")) +
scale_y_discrete(labels=rev(species.labels)) +
facet_wrap(~ `Evaluation metric`, ncol = 5, scales="free_x")+
theme_linedraw() +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_markdown())+
facetted_pos_scales(
x = list(`Evaluation metric` == "Categorical agreement" ~ scale_x_continuous(breaks = c(80,100), limits = c(70, 105)),
`Evaluation metric` == "Sensitivity" ~ scale_x_continuous(breaks = c(80,100), limits = c(65, 100)),
`Evaluation metric` == "Specificity" ~ scale_x_continuous(breaks = c(80, 100), limits = c(70, 105)),
`Evaluation metric` == "Major error" ~ scale_x_continuous(breaks = c(0,10), limits = c(0,10)),
`Evaluation metric` == "Very major error"~ scale_x_continuous(breaks = c(10,20), limits = c(0, 32))
))
rules_species_summary_plot((model_summary_plot + ggtitle(label="a) Logistic regression models")) / (rules_species_summary_plot + ggtitle(label="b) Rules-based classifiers")) + plot_layout(guides="collect", axes="collect"))
#ggsave(height=8, width=12, file="figs/LRmodel_classifier_performance.png")
#ggsave(height=8, width=12, file="figs/LRmodel_classifier_performance.pdf")species_summary_table_long$Category <- "Rules-based classifiers"
model_summary_tab_long$Category <- "Logistic regression models"
performance_summary <- rbind(species_summary_table_long, model_summary_tab_long)
performance_summary <- performance_summary%>%
mutate(`Evaluation metric` = factor(`Evaluation metric`, levels=c("Categorical agreement", "Sensitivity", "Specificity", "Major error", "Very major error", "AIC")))
performance_summary <- performance_summary%>%
mutate(Label = factor(Label, levels=c("Model 1a: determinants",
"Model 1b: determinant*aac6",
"Model 2a: QRDR count + PMQR count + aac6",
"Model 2b: QRDR count*aac6 + PMQR count*aac6","Klebsiella pneumoniae species complex", "Klebsiella pneumoniae", "Kq subsp. quasipneumoniae", "Kq subsp. similipneumoniae", "Klebsiella variicola")))
performance_summary <- performance_summary%>%
mutate(Category = factor(Category, levels=c("Logistic regression models", "Rules-based classifiers")))
y_labels=c("Model 1a: determinants"="Model 1a: determinants",
"Model 1b: determinant*aac6"="Model 1b: determinant*aac6",
"Model 2a: QRDR count + PMQR count + aac6" ="Model 2a: QRDR count + PMQR count + aac6",
"Model 2b: QRDR count*aac6 + PMQR count*aac6"="Model 2b: QRDR count*aac6 + PMQR count*aac6",
"Klebsiella pneumoniae species complex"="<br>*Klebsiella pnemoniae* species complex (N=12,167)",
"Klebsiella pneumoniae"="<br>*Klebsiella pneumoniae* (N=10,634)",
"Kq subsp. quasipneumoniae"="<br>*Kq* subsp. <br>*quasipneumoniae* (N=189)",
"Kq subsp. similipneumoniae"="<br>*Kq* subsp. <br>*similipneumoniae* (N=286)",
"Klebsiella variicola"="<br>*Klebsiella variicola* (N=1,037)")
performance_summary_plot <-
performance_summary %>% ggplot(aes(x=(as.numeric(value)), y=fct_rev(Label)))+
geom_point(aes(colour=Prediction), position=pd) +
geom_text(aes(label=as.numeric(value), colour=Prediction), size=3,position=pd, fontface = "bold", vjust=-0.5, hjust=0.5)+
scale_color_manual(values=c("R"="IndianRed", "NWT"="#78638a")) +
facet_grid(rows=vars(Category), cols=vars(`Evaluation metric`), scales="free", drop=FALSE)+
scale_y_discrete(labels=(y_labels)) +
theme_linedraw() +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_markdown())+
facetted_pos_scales(
x = list(`Evaluation metric` == "Categorical agreement" ~ scale_x_continuous(breaks = c(80,100), limits = c(70, 105)),
`Evaluation metric` == "Sensitivity" ~ scale_x_continuous(breaks = c(80,100), limits = c(65, 100)),
`Evaluation metric` == "Specificity" ~ scale_x_continuous(breaks = c(80, 100), limits = c(70, 105)),
`Evaluation metric` == "Major error" ~ scale_x_continuous(breaks = c(0,10), limits = c(0,10)),
`Evaluation metric` == "Very major error"~ scale_x_continuous(breaks = c(10,20), limits = c(0, 32)), `Evaluation metric` == "AIC"~ scale_x_continuous(breaks = c(12000,13000), limits = c(11500, 13800))
))
#ggsave(height=6, width=12, file="figs/LRmodel_classifier_performance_aligned.png")
#ggsave(height=6, width=12, file="figs/LRmodel_classifier_performance_aligned.pdf")
performance_summary_plot <-
performance_summary %>% ggplot(aes(x=(as.numeric(value)), y=fct_rev(Label)))+
# geom_point(aes(colour=Prediction), position=pd) +
geom_text(aes(label=as.numeric(value), colour=Prediction), size=3,position=pd, fontface = "bold")+
scale_color_manual(values=c("R"="IndianRed", "NWT"="#78638a")) +
facet_grid(rows=vars(Category), cols=vars(`Evaluation metric`), scales="free", drop=FALSE)+
scale_y_discrete(labels=(y_labels)) +
theme_linedraw() +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_markdown())+
facetted_pos_scales(
x = list(`Evaluation metric` == "Categorical agreement" ~ scale_x_continuous(breaks = c(80,100), limits = c(70, 105)),
`Evaluation metric` == "Sensitivity" ~ scale_x_continuous(breaks = c(80,100), limits = c(65, 100)),
`Evaluation metric` == "Specificity" ~ scale_x_continuous(breaks = c(80, 100), limits = c(70, 105)),
`Evaluation metric` == "Major error" ~ scale_x_continuous(breaks = c(0,10), limits = c(0,10)),
`Evaluation metric` == "Very major error"~ scale_x_continuous(breaks = c(10,20), limits = c(0, 32)), `Evaluation metric` == "AIC"~ scale_x_continuous(breaks = c(12000,13000), limits = c(11500, 13800))
))
#ggsave(height=6, width=12, file="figs/LRmodel_classifier_performance_aligned_number.png")
#ggsave(height=6, width=12, file="figs/LRmodel_classifier_performance_aligned_number.pdf")ST_R <- cipro_antibiogram %>% group_by(ST) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
n_dataset=n_distinct(index),
susceptible=n-sum(resistant),
resistant=sum(resistant),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="ST") %>%
relocate(out, category, .before=ST)%>%
rename(category_name=ST)Country_R <- cipro_antibiogram %>% group_by(Country) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
n_dataset=n_distinct(index),
susceptible=n-sum(resistant),
resistant=sum(resistant),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="Country") %>%
relocate(out, category, .before=Country)%>%
rename(category_name=Country)SourceType_R <- cipro_antibiogram %>% group_by(Source.Type) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
n_dataset=n_distinct(index),
susceptible=n-sum(resistant),
resistant=sum(resistant),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="Source") %>%
relocate(out, category, .before=Source.Type)%>%
rename(category_name=Source.Type)cipro_antibiogram$Infection.status <- cipro_antibiogram$Infection.status %>% replace_na("Unknown")
InfectionStatus_R <- cipro_antibiogram %>% group_by(Infection.status) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
n_dataset=n_distinct(index),
susceptible=n-sum(resistant),
resistant=sum(resistant),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="Inf") %>%
relocate(out, category, .before=Infection.status) %>%
rename(category_name=Infection.status)cipro_antibiogram <- cipro_antibiogram %>%
mutate(platform_type=case_when(
Laboratory.Typing.Platform=="Sensititre" ~ "Auto",
Laboratory.Typing.Platform=="Phoenix" ~ "Auto",
Laboratory.Typing.Platform=="Vitek" ~ "Auto",
Laboratory.Typing.Platform=="Manual" ~ "Manual",
TRUE ~ "Unknown"
))
platform_R <- cipro_antibiogram %>%
group_by(platform_type) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
n_dataset=n_distinct(index),
susceptible=n-sum(resistant),
resistant=sum(resistant),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="Plat") %>%
relocate(out, category, .before=platform_type)%>%
rename(category_name=platform_type)
platform_RCombined_R <- bind_rows(ST_R, Country_R, SourceType_R, InfectionStatus_R, platform_R)
Combined_R <- Combined_R %>%
rename(Sensitivity=sens, Specificity=spec, ME=me, VME=vme, `Categorical agreement`=cat, N=n, `Resistant`=percent_R, Category=category, Prediction=out, `Category Name`=category_name)
#%>%
# write_csv("tables/TableSX_ModelSummary_Combined.csv", na="-")
#filter for categories: susceptible genomes >10, more than 25 genomes, more than 2 datasets
Combined_R_100 <- Combined_R %>%
filter(susceptible>10) %>%
filter(N>25) %>%
filter(n_dataset >=3) %>%
mutate(Category_label=paste(`Category Name`, " (n=", N,", S=", susceptible, ")", sep = "")) %>%
select(Category, Category_label, N, Sensitivity, Specificity, `Resistant`)
# convert table from wide to long for plotting
Combined_R_100_long<-gather(Combined_R_100, `Evaluation metric`, value, Sensitivity:`Resistant`)
Combined_R_100_long <- Combined_R_100_long[order(Combined_R_100_long$N, decreasing = TRUE),]
Combined_R_100_plot <-ggplot(Combined_R_100_long, aes(as.numeric(value),fct_inorder(as.factor(Category_label)))) +
geom_point(aes(shape = `Evaluation metric`, size=`Evaluation metric`, colour=`Evaluation metric`), alpha=0.5) +
scale_shape_manual(values = c(Sensitivity=15, Specificity=18, Resistant=3)) +
scale_size_manual(values = c(Sensitivity=3, Specificity=3, Resistant=1)) +
scale_colour_manual(values=c(Sensitivity="#8eb567", Specificity="#9a98ea", Resistant="IndianRed")) +
scale_y_discrete(limits=rev)+
facet_grid(Category~., scales = "free", space='free')+
labs(y="", x = "%") +
theme_bw()+
theme(legend.title=element_blank())
Combined_R_100_plotStudy_R <- cipro_antibiogram %>% group_by(index) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="index") %>%
relocate(out, category, .before=index)%>%
rename(category_name=index)platform_R <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method!="Disk diffusion") %>%
group_by(Laboratory.Typing.Platform) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="Laboratory.Typing.Platform") %>%
relocate(out, category, .before=Laboratory.Typing.Platform)%>%
rename(category_name=Laboratory.Typing.Platform)
platform_Rcipro_antibiogram %>%
filter(Laboratory.Typing.Method!="Disk diffusion") %>%
group_by(Laboratory.Typing.Platform) %>% summarise(studies=length(unique(index)))platform_disk <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method=="Disk diffusion") %>%
group_by(Laboratory.Typing.Platform) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="Laboratory.Typing.Platform") %>%
relocate(out, category, .before=Laboratory.Typing.Platform)%>%
rename(category_name=Laboratory.Typing.Platform)
cipro_antibiogram %>%
filter(Laboratory.Typing.Method=="Disk diffusion") %>%
group_by(Laboratory.Typing.Platform) %>% summarise(studies=length(unique(index)))method_R <- cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="Laboratory.Typing.Method") %>%
relocate(out, category, .before=Laboratory.Typing.Method)%>%
rename(category_name=Laboratory.Typing.Method)cipro_antibiogram <- cipro_antibiogram %>%
mutate(correctR=case_when(predR=="R" & resistant==1 ~ 1, predR=="S" & resistant==0 ~ 1, TRUE ~ 0)) %>%
mutate(method=if_else(Laboratory.Typing.Method=="Disk diffusion", "DD", "MIC"))
cipro_antibiogram %>% group_by(correctR, predR, resistant) %>% count()study_method <- logistf(correctR ~ method + index + Source.Type + Infection.status, data=cipro_antibiogram)
summary(study_method)## logistf(formula = correctR ~ method + index + Source.Type + Infection.status,
## data = cipro_antibiogram)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 6.9865847 1.7046310 3.9096880 12.03251469
## methodMIC 0.5016894 0.8385742 -0.8326511 2.69159454
## indexAlfred_KASPAH -2.1309202 1.4310942 -6.9772376 -0.14785843
## indexbachman_2022 -1.7131154 1.5196149 -6.6210465 0.64534725
## indexColombia_GHRU -1.5317296 1.4450223 -6.3877366 0.50781145
## indexDenmark_MVK_MIC -3.3714542 2.0159048 -8.7053054 1.94365082
## indexEgli_MIC -2.8695316 1.4626052 -7.7384046 -0.76855869
## indexHeinz_2019 -3.1231309 1.4337668 -7.9712676 -1.13004709
## indexHuynh_2020 -1.7966687 1.6837407 -6.8221852 1.24075376
## indexIndia_GHRU -1.9005228 1.4273573 -6.7441432 0.06721892
## indexItaly_MVK_MIC -3.4431591 1.6767683 -8.4839523 -0.29466474
## indexKLEBGAP -0.7194896 1.6396743 -5.7088519 2.22703459
## indexLeangapichart_2021 -4.8641676 1.4799242 -9.7459112 -2.70095579
## indexLong -1.0607079 1.4286427 -5.9052292 0.91225872
## indexmathers_virginia -2.0881395 1.5607964 -7.0246829 0.45268294
## indexmilan_sanraffaele_2019 -1.4738325 1.5381238 -6.3959031 0.93744601
## indexNigeria_GHRU -2.1150901 1.4747869 -6.9919351 0.04734375
## indexParkhill -2.4120290 1.4377247 -7.2629344 -0.40265829
## indexperdigao_2022 -2.5210184 1.6557482 -7.5235196 0.45771860
## indexPhilippines_DD -1.4627405 1.8490809 -6.6173829 2.04998886
## indexPhilippines_MIC -2.7095832 1.4348649 -7.5584846 -0.71187912
## indexPotter_2018 -0.8353447 1.8444929 -5.9857567 2.66884318
## indexsands2021_arthropods_DD -4.7619157 1.7493157 -9.8458765 -1.58219549
## indexsands2021_BARNARDS -3.1291819 1.4292385 -7.9741317 -1.15412571
## indexSPARK -1.9578723 1.4274686 -6.8016555 0.01050652
## indexStoesser_2013 -2.1951780 1.4996278 -7.0893615 0.07247002
## indexTorok_Vietnam_2020 -0.6662636 1.4489949 -5.5253071 1.38974708
## indexWright -1.4263428 1.5569627 -6.3599058 1.10501373
## Source.TypeEnvironment -0.5035684 0.7547850 -1.9275261 1.26440455
## Source.TypeFood/Feed 1.3616570 1.4956291 -1.6146043 6.36189136
## Source.TypeHuman -2.0098800 0.4231111 -2.9206741 -1.22099454
## Source.TypeMarine -0.3815111 1.4768824 -2.5442699 4.49734855
## Infection.statusInfection -0.6052434 0.2090200 -1.0302855 -0.20109756
## Infection.statusUnknown -0.6981983 0.2907305 -1.2868904 -0.13502740
## Chisq p method
## (Intercept) 14.89039480 1.139417e-04 2
## methodMIC 0.41719000 5.183425e-01 2
## indexAlfred_KASPAH 4.68568277 3.041497e-02 2
## indexbachman_2022 1.84418766 1.744609e-01 2
## indexColombia_GHRU 1.83694586 1.753093e-01 2
## indexDenmark_MVK_MIC 1.96677446 1.607907e-01 2
## indexEgli_MIC 8.78645394 3.034756e-03 2
## indexHeinz_2019 13.99915605 1.828927e-04 2
## indexHuynh_2020 1.34713007 2.457805e-01 2
## indexIndia_GHRU 3.49178199 6.167416e-02 2
## indexItaly_MVK_MIC 4.52106847 3.347992e-02 2
## indexKLEBGAP 0.21310618 6.443436e-01 2
## indexLeangapichart_2021 30.91042199 2.702168e-08 2
## indexLong 0.79006108 3.740817e-01 2
## indexmathers_virginia 2.51288529 1.129190e-01 2
## indexmilan_sanraffaele_2019 1.25147649 2.632707e-01 2
## indexNigeria_GHRU 3.63548722 5.656005e-02 2
## indexParkhill 6.35210938 1.172418e-02 2
## indexperdigao_2022 2.78741429 9.500742e-02 2
## indexPhilippines_DD 0.67842169 4.101307e-01 2
## indexPhilippines_MIC 9.06850672 2.600493e-03 2
## indexPotter_2018 0.21729844 6.411061e-01 2
## indexsands2021_arthropods_DD 7.98226733 4.723774e-03 2
## indexsands2021_BARNARDS 14.83254190 1.174906e-04 2
## indexSPARK 3.78518868 5.170808e-02 2
## indexStoesser_2013 3.55617066 5.932445e-02 2
## indexTorok_Vietnam_2020 0.25929116 6.106076e-01 2
## indexWright 1.09418597 2.955456e-01 2
## Source.TypeEnvironment 0.38310890 5.359441e-01 2
## Source.TypeFood/Feed 0.79317450 3.731419e-01 2
## Source.TypeHuman 30.40533302 3.505631e-08 2
## Source.TypeMarine 0.05996965 8.065439e-01 2
## Infection.statusInfection 8.78395136 3.038922e-03 2
## Infection.statusUnknown 5.95511390 1.467465e-02 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=400.0552 on 33 df, p=0, n=12167
## Wald test = 3879.028 on 33 df, p = 0
category_phenotype_table <- cipro_antibiogram %>%
group_by(rules_category, resistant) %>%
count() %>%
ungroup() %>%
pivot_wider(names_from=resistant, values_from=n, values_fill=0) %>%
rename(SI=`0`, R=`1`)
category_phenotype_table <- cipro_antibiogram %>%
group_by(rules_category, WT_vs_nonWT) %>%
count() %>%
ungroup() %>%
pivot_wider(names_from=WT_vs_nonWT, values_from=n, values_fill=0) %>%
rename(NWT=`non-wt (I/R)`, WT=`wt (S)`) %>%
full_join(category_phenotype_table)
category_phenotype_table <- category_phenotype_table %>%
mutate(N=NWT+WT) %>%
mutate(`R PPV`=paste0(round(R/N*100,2), "% (N=", R,"/",N,")")) %>%
mutate(`NWT PPV`=paste0(round(NWT/N*100,2), "% (N=", NWT,"/",N,")"))
category_phenotype_tablepredR_summary <- cipro_antibiogram %>%
group_by(predR, resistant) %>%
count() %>%
ungroup() %>%
pivot_wider(names_from=resistant, values_from=n, values_fill=0) %>%
rename(SI=`0`, R=`1`) %>%
mutate(N=SI+R) %>%
mutate(PPV = R/N)
predNWT_summary <- cipro_antibiogram %>%
group_by(predNWT, WT_vs_nonWT) %>%
count() %>%
ungroup() %>%
pivot_wider(names_from=WT_vs_nonWT, values_from=n, values_fill=0) %>%
mutate(N=`non-wt (I/R)`+`wt (S)`) %>%
mutate(PPV = `non-wt (I/R)`/N)
category_phenotype_table %>%
mutate(QRDR=c("0^", "0", "0", "1", "1", ">1", "0", "0", "0", ">0")) %>%
mutate(PMQR=c("0", "0", "qnrB1", "0", "0", "0", "1^", "1", ">1", ">0")) %>%
mutate(aac6=c("0", "1", "0", "0", "1", "*", "0", "1", "*", "*")) %>%
mutate(Prediction=c("WT S", "WT S", "NWT I", rep("NWT R", 7))) %>%
mutate(N=as.character(N)) %>%
select(QRDR, PMQR, aac6, Prediction, N, `R PPV`, `NWT PPV`) %>%
bind_rows(c(QRDR="Overall", PMQR="", aac6="", Prediction="-", N=as.character(nrow(cipro_antibiogram)),
`R PPV`=paste0(round(predR_summary[2, "PPV"]*100,2),"% (N=", predR_summary[2,"R"], "/", predR_summary[2,"N"],")"),
`NWT PPV`=paste0(round(predNWT_summary[2, "PPV"]*100,2),"% (N=", predNWT_summary[2,"non-wt (I/R)"], "/", predNWT_summary[2,"N"],")")
)) %>%
write_csv(file="tables/Table2_Classifier.csv")
write(x="\n^=GyrA-87G and GyrA-87H are not included in QRDR count; qnrB1 is excluded from the single-PMQR count. *aac6 presence is not considered", file="tables/Table2_Classifier.csv", append=T)# MIC distribution R vs. S
rules_MIC_distributionRS <- cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
filter(Laboratory.Typing.Method != "Disk diffusion") %>%
mutate(predSIR_label=case_when(predSIR=="S" ~ "WT S", predSIR=="R" ~ "NWT R", TRUE ~ "NWT I")) %>%
mutate(predSIR_label=factor(predSIR_label, levels=c("WT S", "NWT I", "NWT R"))) %>%
ggplot(aes(as.factor(round(as.numeric(Measurement), digits=2)), fill = predSIR_label)) + geom_bar() +
labs(title = "A) MIC, by classifer prediction", y = "Number of genomes", x="MIC (mg/L)",
linetype = "MIC breakpoints", fill="Classifier prediction") +
# round off x-axis labels to 2dp
scale_x_discrete(labels = function(x) round(as.numeric(x), digits = 2)) +
scale_fill_manual(values = res_colours3)+
geom_vline(aes(xintercept = 5.5), color = "black") +
annotate("text", x=5.75, y=2900, label="R", size=2.5)+
geom_vline(aes(xintercept = 4.5), color = "black") +
annotate("text", x=4.25, y=2900, label="S", size=2.5)+
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust=1, size=11))
rules_MIC_distributionRS# DD distribution
rules_DD_distributionRS <- cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
mutate(predSIR_label=case_when(predSIR=="S" ~ "WT S", predSIR=="R" ~ "NWT R", TRUE ~ "NWT I")) %>%
mutate(predSIR_label=factor(predSIR_label, levels=c("WT S", "NWT I", "NWT R"))) %>%
ggplot(aes(as.numeric(Measurement), fill = predSIR_label)) + geom_bar() +
labs(title = "B) Disk zones, by classifer prediction", y = "Number of genomes", x ="Disk zone (mm)",
linetype = "Disk diffusion \nbreakpoints") +
scale_fill_manual(values = res_colours3)+
geom_vline(aes(xintercept = 21.5), color = "black") +
annotate("text", x=20.5, y=400, label="R", size=2.5)+
geom_vline(aes(xintercept = 24.5), color = "black") +
#geom_vline(aes(xintercept = 25.5), color = "black", linetype=2) +
annotate("text", x=25.5, y=400, label="S", size=2.5)+
theme_minimal() +
theme(axis.text.x = element_text(size=11))+
theme(legend.position = "none")
rules_DD_distributionRSprofile_labels <- paste(
paste0(c("0^", "0", "0", "1", "1", ">1", "0", "0", "0", ">0"), rep(" QRDR,", 10)),
c("0 PMQR,", "0 PMQR,", "qnrB1,", "0 PMQR,", "0 PMQR,", "0 PMQR", "1 PMQR^,", "1 PMQR,", ">1 PMQR", ">0 PMQR"),
c("aac6-", "aac6+", "aac6-", "aac6-", "aac6+", "", "aac6-", "aac6+", "", "")
)
names(profile_labels)<-0:9
profile_colours <- c("0" = "#9DDFFE", "1"="#1A83B6", "2"="#98d3a4", "3"="#fbe8a5", "4"="#ecb16f", "5"="#f17c1b", "6"="#fdc0c0", "7"="#e88f8f", "8"="#b92020", "9"="#59124b")rules_category_SIR_plot <- cipro_antibiogram %>%
ggplot(aes(x=as.factor(rules_category), fill=factor(Resistance.phenotype, levels = c("S", "I", "R")))) +
geom_bar(position = "fill") + coord_flip() +
scale_y_continuous(labels = scales::percent)+
labs(title="C) Observed phenotype distribution, per genotype profile", x="Genotype profile", y="Proportion (%)", fill="Observed phenotype") +
scale_fill_manual(values = res_colours)+
scale_x_discrete(limits=rev, labels=profile_labels) +
geom_text_repel(aes(label = after_stat(count)), stat = "count", size=3, direction="x", position=position_fill(0.4)) +
theme_minimal()
rules_category_SIR_plotrules_MIC_distribution <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method != "Disk diffusion") %>%
ggplot(aes(x = as.factor(round(as.numeric(Measurement), digits=2)), fill = as.factor(rules_category))) + geom_bar() +
labs(x = "MIC (mg/L)", y = "Number of genomes",
linetype = "MIC breakpoints", title="A) MIC, by genotype profile", fill="Genotype profile") +
scale_fill_manual(values=profile_colours, labels=profile_labels)+
# round off x-axis labels to 2dp
scale_x_discrete(labels = function(x) round(as.numeric(x), digits = 2)) +
geom_vline(aes(xintercept = 5.5), color = "black") +
annotate("text", x=5.75, y=2900, label="R", size=2.5)+
geom_vline(aes(xintercept = 4.5), color = "black") +
annotate("text", x=4.25, y=2900, label="S", size=2.5)+
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust=1, size=11))
rules_MIC_distribution# DD distribution
rules_DD_distribution <- cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
ggplot(aes(as.numeric(Measurement), fill = as.factor(rules_category))) + geom_bar() +
scale_fill_manual(values=profile_colours, labels=profile_labels)+
geom_vline(aes(xintercept = 21.5), color = "black") +
annotate("text", x=20.5, y=400, label="R", size=2.5)+
geom_vline(aes(xintercept = 24.5), color = "black", linetype=2) +
geom_vline(aes(xintercept = 25.5), color = "black", linetype=2) +
annotate("text", x=26.5, y=400, label="S", size=2.5)+
theme_minimal()+
labs(y = "Number of genomes", x="Disk zone (mm)", title="B) Disk zones, by genotype profile") +
theme_minimal() +
theme(axis.text.x = element_text(size=11))+
theme(legend.position = "none")dd_atu <- cipro_antibiogram %>% filter(Resistance.phenotype!="R" & predSIR=="R") %>% filter(Laboratory.Typing.Method == "Disk diffusion") %>% filter(as.numeric(Measurement) >21 & as.numeric(Measurement)<24)
mic_atu <- cipro_antibiogram %>% filter(Resistance.phenotype!="R" & predSIR=="R") %>% filter(Laboratory.Typing.Method != "Disk diffusion") %>% filter(as.numeric(Measurement) ==0.5)
atu <- bind_rows(mic_atu, mic_atu)
atu %>% group_by(Flq_acquired, Flq_mutations) %>% count() %>% mutate(pc=n/nrow(atu))## [1] "Alfred_KASPAH" "KLEBGAP" "India_GHRU"
## [4] "Leangapichart_2021" "Philippines_MIC" "Egli_MIC"
## [7] "Long" "mathers_virginia" "Italy_MVK_MIC"
## [10] "Parkhill" "milan_sanraffaele_2019" "Colombia_GHRU"
## [13] "sands2021_BARNARDS" "SPARK" "Nigeria_GHRU"
## [16] "Stoesser_2013" "Heinz_2019" "bachman_2022"
## [1] "Klebsiella pneumoniae"
## [2] "Klebsiella quasipneumoniae subsp. quasipneumoniae"
## [3] "Klebsiella quasipneumoniae subsp. similipneumoniae"
# number R with no res determinants
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% nrow()## [1] 140
## [1] 6177
# assay measures for unexplained R
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% filter(Laboratory.Typing.Method!="Disk diffusion" & Measurement==1) %>% nrow()## [1] 53
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% filter(Laboratory.Typing.Method=="Disk diffusion" & Measurement %in% c(20, 21)) %>% nrow()## [1] 20
# STs for unexplained R
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% group_by(ST) %>% count()cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% group_by(ST) %>% count() %>% nrow()## [1] 84
# porin mutations in all genomes without QRDR/PMQR/aac6
cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0& aac6==0 & Omp_mutations !="-") %>% nrow()## [1] 94
## [1] 5671
(cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0& aac6==0 & Omp_mutations !="-") %>% nrow()) / (cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% nrow())## [1] 0.01657556
cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% group_by(Omp_mutations) %>% count()cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% filter(grepl('OmpK35', Omp_mutations))%>% nrow()## [1] 45
cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% filter(grepl('OmpK36', Omp_mutations))%>% nrow()## [1] 53
# genomes without QRDR/PMQR/aac6
noResDet <- cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% mutate(ompK35=if_else(grepl('OmpK35', Omp_mutations), 1, 0)) %>% mutate(ompK36=if_else(grepl('OmpK36', Omp_mutations), 1, 0))
fisher.test(noResDet$ompK35, noResDet$resistant)##
## Fisher's Exact Test for Count Data
##
## data: noResDet$ompK35 and noResDet$resistant
## p-value = 6.934e-09
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 6.15008 28.58310
## sample estimates:
## odds ratio
## 13.761
##
## 2-sample test for equality of proportions with continuity correction
##
## data: table(noResDet$ompK35, noResDet$resistant)[2:1, 2:1]
## X-squared = 82.013, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.08469031 0.35834006
## sample estimates:
## prop 1 prop 2
## 0.24444444 0.02292926
##
## Fisher's Exact Test for Count Data
##
## data: noResDet$ompK36 and noResDet$resistant
## p-value = 0.04102
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.8500308 9.1624131
## sample estimates:
## odds ratio
## 3.289251
##
## 2-sample test for equality of proportions with continuity correction
##
## data: table(noResDet$ompK36, noResDet$resistant)[2:1, 2:1]
## X-squared = 3.7993, df = 1, p-value = 0.05127
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.02948782 0.13201541
## sample estimates:
## prop 1 prop 2
## 0.0754717 0.0242079
# porin mutations in unexplained R
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & Flq_acquired=="-" & Flq_mutations=="-" & aac6==0) %>% filter(Omp_mutations!="-") %>% nrow()## [1] 14
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & Flq_acquired=="-" & Flq_mutations=="-" & aac6==0) %>% filter(Omp_mutations!="-") %>% nrow()/ (cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% nrow())## [1] 0.1
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & Flq_acquired=="-" & Flq_mutations=="-" & aac6==0) %>% group_by(Omp_mutations) %>% count()# assay measures for unexplained R with porin mutations
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & Flq_acquired=="-" & Flq_mutations=="-" & aac6==0) %>% group_by(Omp_mutations, Measurement, Measurement.units) %>% count() %>% filter(Omp_mutations!="-") %>% arrange(Measurement)# number S with mutations
S_mut <- cipro_antibiogram %>% filter(Resistance.phenotype=="S" & ((QRDR_mutations==1 & Flq_mutations!="GyrA-87G"& Flq_mutations!="GyrA-87H") | (acquired_genes==1 & !grepl('qnrB1.', Flq_acquired))))
S_mut %>% nrow()## [1] 41
# assay measures for unexplained R
S_mut %>% filter(Laboratory.Typing.Method!="Disk diffusion" & Measurement==0.25) %>% nrow()## [1] 30
S_mut %>% filter(Laboratory.Typing.Method=="Disk diffusion" & Measurement %in% c(25, 26)) %>% nrow()## [1] 4
## [1] 33
# assay measures for unexplained R with porin mutations
S_mut %>% group_by(Flq_acquired, Flq_mutations, Measurement, Measurement.units) %>% count() %>% arrange(Measurement)MIC_vs_dets <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method!="Disk diffusion") %>%
select(`GyrA-83F`:`qnrVC6`, aac6, Measurement) %>%
rename(`aac(6')-Ib-cr`=aac6) %>%
select(-c(ends_with('?'), ends_with('*'))) # remove imprecise matches
# exclude genomes with rare variants
MIC_vs_dets <- MIC_vs_dets[,colSums(MIC_vs_dets) >= 20]
# model MIC vs all common variants (N>=20) = Model 1a
model_MIC_indiv <- lm(log2(Measurement) ~ ., data=MIC_vs_dets)
# model R vs all common variants (N>=20), each with aac6 interaction = Model 1b
model_MIC_indiv_aac6Interaction <- lm(log2(Measurement) ~ .*`aac(6')-Ib-cr`, data=MIC_vs_dets)# summarise model output
linreg_MIC_model1a <- linreg_details(model_MIC_indiv) %>% mutate(interaction=NA) %>% mutate(model="a") %>% relocate(interaction, .after=Determinant) %>%
mutate(Determinant=gsub("`","",Determinant))
linreg_MIC_model1b <- linreg_details(model_MIC_indiv_aac6Interaction) %>%
rename(Term=Determinant) %>%
separate_wider_delim(Term, ":", names=c("Determinant","interaction"), too_few="align_start", cols_remove=F) %>%
mutate(model="b") %>%
mutate(Determinant=gsub("`","",Determinant)) %>%
mutate(Term=gsub("`","",Term)) %>%
mutate(interaction=gsub("`","",interaction))
# all individual determinants, plus significant positive interactions
lin_reg_MIC_plot <- linreg_MIC_model1a %>% bind_rows(linreg_MIC_model1b) %>%
filter((pval<0.05 & est>0) | is.na(interaction)) %>%
mutate(interaction=factor(replace(interaction, is.na(interaction), "none"), levels=c("none", "aac(6')-Ib-cr"))) %>%
filter(Determinant != "(Intercept)") %>%
ggplot(aes(y=Determinant)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper, group=model, col=model, linetype=interaction), position=pd) +
geom_point(aes(x=est, group=model, col=model), position=pd) +
scale_colour_manual(values=c(a="grey", b="black")) +
theme_bw() + labs(x="Regression coefficient", col="Model", linetype="Interaction")
lin_reg_MIC_plotDD_vs_dets <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method=="Disk diffusion") %>%
select(`GyrA-83F`:`qnrVC6`, aac6, Measurement) %>%
rename(`aac(6')-Ib-cr`=aac6) %>%
select(-c(ends_with('?'), ends_with('*'))) # remove imprecise matches
# exclude genomes with rare variants
DD_vs_dets <- DD_vs_dets[,colSums(DD_vs_dets) >= 20]
# model MIC vs all common variants (N>=20) = Model 1a
model_DD_indiv <- lm(Measurement ~ ., data=DD_vs_dets)
# model R vs all common variants (N>=20), each with aac6 interaction = Model 1b
model_DD_indiv_aac6Interaction <- lm(Measurement ~ .*`aac(6')-Ib-cr`, data=DD_vs_dets)# summarise model output
linreg_DD_model1a <- linreg_details(model_DD_indiv) %>% mutate(interaction=NA) %>% mutate(model="a") %>% relocate(interaction, .after=Determinant) %>%
mutate(Determinant=gsub("`","",Determinant))
linreg_DD_model1b <- linreg_details(model_DD_indiv_aac6Interaction) %>%
rename(Term=Determinant) %>%
separate_wider_delim(Term, ":", names=c("Determinant","interaction"), too_few="align_start", cols_remove=F) %>%
mutate(model="b") %>%
mutate(Determinant=gsub("`","",Determinant)) %>%
mutate(Term=gsub("`","",Term)) %>%
mutate(interaction=gsub("`","",interaction))
# all individual determinants, plus significant positive interactions
lin_reg_DD_plot <- linreg_DD_model1a %>% bind_rows(linreg_DD_model1b) %>%
filter((pval<0.05 & est<0) | is.na(interaction)) %>%
mutate(interaction=replace(interaction, is.na(interaction), "-")) %>%
filter(Determinant != "(Intercept)") %>%
ggplot(aes(y=Determinant)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper, group=model, col=model, linetype=interaction), position=pd) +
geom_point(aes(x=est, group=model, col=model), position=pd) +
scale_colour_manual(values=c(a="grey", b="black")) +
theme_bw() + labs(x="Regression coefficient", col="Model", linetype="Interaction") +
theme(legend.position="none")
lin_reg_DD_plotlin_reg_MIC_plot + ggtitle("MIC model") + lin_reg_DD_plot + ggtitle("Disk diffusion zone model") + plot_layout(axes="collect", guides="collect")
#ggsave(width=8, height=5, file="figs/FigX_regressionMIC_DD_Models.pdf")
#ggsave(width=8, height=5, file="figs/FigX_regressionMIC_DD_Models.png")cipro_antibiogram %>% filter(Laboratory.Typing.Method!="Disk diffusion") %>% ggplot(aes(x=factor(rules_category),y=Measurement)) + geom_boxplot() + scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:9), labels = function(x) round(as.numeric(x), digits = 3))rules_MIC_distribution_separate <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method != "Disk diffusion") %>%
mutate(profiles=case_when(rules_category==0 ~ profile_labels["0"],
rules_category==1 ~ profile_labels["1"],
rules_category==2 ~ profile_labels["2"],
rules_category==3 ~ profile_labels["3"],
rules_category==4 ~ profile_labels["4"],
rules_category==5 ~ profile_labels["5"],
rules_category==6 ~ profile_labels["6"],
rules_category==7 ~ profile_labels["7"],
rules_category==8 ~ profile_labels["8"],
rules_category==9 ~ profile_labels["9"],
TRUE ~ NA
)) %>%
mutate(profiles=fct_relevel(profiles, profile_labels)) %>%
ggplot(aes(x = as.factor(round(as.numeric(Measurement), digits=2)),
fill = profiles)) +
geom_bar() +
labs(x = "MIC (mg/L)", y = "Number of genomes") +
scale_fill_manual(values=unname(profile_colours))+
# round off x-axis labels to 2dp
scale_x_discrete(labels = function(x) round(as.numeric(x), digits = 2)) +
geom_vline(aes(xintercept = 5.5), color = "black") +
geom_vline(aes(xintercept = 4.5), color = "black") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust=1, size=8),
axis.text.y = element_text(size=7),
legend.position="none") +
facet_wrap(~profiles, scales="free_y", ncol=1)
rules_MIC_distribution_separatediscrete_quantile <- function(x, p) {
sort(x)[length(x)*p]
}
cipro_antibiogram <- cipro_antibiogram %>%
mutate(profiles=case_when(rules_category==0 ~ profile_labels["0"],
rules_category==1 ~ profile_labels["1"],
rules_category==2 ~ profile_labels["2"],
rules_category==3 ~ profile_labels["3"],
rules_category==4 ~ profile_labels["4"],
rules_category==5 ~ profile_labels["5"],
rules_category==6 ~ profile_labels["6"],
rules_category==7 ~ profile_labels["7"],
rules_category==8 ~ profile_labels["8"],
rules_category==9 ~ profile_labels["9"],
TRUE ~ NA
)) %>%
mutate(profiles=fct_relevel(profiles, profile_labels))
MIC_pred <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method != "Disk diffusion") %>%
group_by(profiles) %>%
summarise(median=median(Measurement),
lower25=discrete_quantile(as.numeric(Measurement), p=0.25),
upper25=discrete_quantile(as.numeric(Measurement), p=0.75)) %>%
mutate(IQR=paste0(median, " [", lower25, "-", upper25, "]"))
MIC_predrules_MIC_distribution_separate <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method != "Disk diffusion") %>%
left_join(MIC_pred) %>%
mutate(y=case_when(rules_category==0 ~ 1000,
rules_category==1 ~ 50,
rules_category==2 ~ 25,
rules_category==3 ~ 15,
rules_category==4 ~ 4,
rules_category==5 ~ 500,
rules_category==6 ~ 100,
rules_category==7 ~ 200,
rules_category==8 ~ 10,
rules_category==9 ~ 1000,
TRUE ~ NA
)) %>%
ggplot(aes(x = as.factor(round(as.numeric(Measurement), digits=2)),
fill = profiles)) +
geom_bar() +
#geom_text(aes(label=IQR, y=y), x=as.factor("256"), check_overlap = T, size=3) +
labs(x = "MIC (mg/L)", y = "Number of genomes") +
scale_fill_manual(values=c("LightBlue", "LightBlue", "#78638a", rep("IndianRed", 7)))+
# round off x-axis labels to 2dp
scale_x_discrete(labels = function(x) round(as.numeric(x), digits = 2)) +
geom_vline(aes(xintercept = 5.5), color = "black") +
geom_vline(aes(xintercept = 4.5), color = "black") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust=1, size=8),
axis.text.y = element_text(size=7),
legend.position="none") +
facet_wrap(~profiles, scales="free_y", ncol=1)
rules_MIC_distribution_separaterules_DD_distribution_separate <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
mutate(profiles=case_when(rules_category==0 ~ profile_labels["0"],
rules_category==1 ~ profile_labels["1"],
rules_category==2 ~ profile_labels["2"],
rules_category==3 ~ profile_labels["3"],
rules_category==4 ~ profile_labels["4"],
rules_category==5 ~ profile_labels["5"],
rules_category==6 ~ profile_labels["6"],
rules_category==7 ~ profile_labels["7"],
rules_category==8 ~ profile_labels["8"],
rules_category==9 ~ profile_labels["9"],
TRUE ~ NA
)) %>%
mutate(profiles=fct_relevel(profiles, profile_labels)) %>%
ggplot(aes(x = as.numeric(Measurement), fill = profiles)) +
geom_bar() +
labs(y = "Number of genomes",
x="Disk zone (mm)",
title="B) Disk zones, by genotype profile") +
scale_fill_manual(values=unname(profile_colours))+
# round off x-axis labels to 2dp
scale_x_discrete(labels = function(x) round(as.numeric(x), digits = 2)) +
geom_vline(aes(xintercept = 21.5), color = "black") +
geom_vline(aes(xintercept = 24.5), color = "black", linetype=2) +
geom_vline(aes(xintercept = 25.5), color = "black", linetype=2) +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust=1, size=10),
legend.position="none") +
facet_wrap(~profiles, scales="free_y", ncol=2)
rules_DD_distribution_separate